Racing
Data visualisation
Interactive
Author

Lorenzo Mattioli

Published

10 settembre 2024

Formula 1

My whole family has been forever fascinated with car racing. This short exercise is therefore somewhat of a homage to the discipline.

The following charts were the excuse to try out the packages highcharter (for drilldown graphs) and gt (for tables). Both were heavily inspired by Tanya Shapiro’s work. The data used for this project can be found on Kaggle.

Setup code
# Libraries
packages <- c('tidyverse','MetBrewer','highcharter', 'gt', 'gtExtras')
install.packages(setdiff(packages, rownames(installed.packages()))) 
library(gt)
library(gtExtras)
library(tidyverse)
library(MetBrewer)
library(highcharter)
rm(packages)


# Importing data
drivers <- read_csv("archive/drivers.csv", show_col_types = F)
results <- read_csv('archive/results.csv', show_col_types = F)
flags <- read_csv("flags_iso.csv")

# Data cleaning
## setting mixed nationalities to first one
drivers$nationality <- gsub('-Italian', '', drivers$nationality)
drivers$nationality <- gsub('East German', 'German', drivers$nationality)
## recoding NAs in number column
drivers$number <- ifelse(drivers$number == '\\N', NA, drivers$number)

# Defining wins and podiums vectors
wins <- 
results |> 
  filter(position == 1) |> 
  count(driverId) |> 
  rename(wins = n)
podiums <- 
  results |> 
  filter(position == 1 | position == 2 | position == 3) |> 
  count(driverId) |> 
  rename(podiums = n)

# Merging with driver data
drivers <- full_join(drivers, wins)
drivers <- full_join(drivers, podiums)
rm(wins, podiums)

# Creating full name column
drivers <- drivers |> 
  mutate(name = paste(forename, surname, sep = ' '))

# Defining continent variable
africa <- c('Algerian','Angolan','Beninese','Batswana','Burkinabe','Burundian','Cameroonian','Cape Verdean','Central African','Chadian','Comoran','Congolese (Democratic Republic of the Congo)','Congolese (Republic of the Congo)','Ivorian (Côte d’Ivoire)','Djiboutian','Egyptian','Equatorial Guinean','Eritrean','Ethiopian','Gabonese','Gambian','Ghanaian','Guinean','Guinea-Bissauan','Kenyan','Lesothoan','Liberian','Libyan','Malagasy','Malawian','Malian','Mauritanian','Mauritian','Moroccan','Mozambican','Namibian','Nigerien','Nigerian','Rwandan','Sahrawi','Saint Helenian','Sao Tomean','Senegalese','Seychellois','Sierra Leonean','Somali','South African','South Sudanese','Sudanese','Swazi','Tanzanian','Togolese','Tunisian','Ugandan','Zambian','Zimbabwean')

asia <- c('Afghan','Armenian','Azerbaijani','Bahraini','Bangladeshi','Bhutanese','Bruneian','Cambodian','Chinese','Cypriot','Filipino','Georgian','Indian','Indonesian','Iranian','Iraqi','Israeli','Japanese','Jordanian','Kazakhstani','Kuwaiti','Kyrgyzstani','Laotian','Lebanese','Malaysian','Maldivian','Mongolian','Myanmar (Burmese)','Nepalese','North Korean','Omani','Pakistani','Palestinian','Philippine','Qatari','Russian','Saudi Arabian','Singaporean','South Korean','Sri Lankan','Syrian','Taiwanese','Tajikistani','Thai','Timorese','Turkish','Turkmen','Emirati','Uzbekistani','Vietnamese','Yemeni')

europe <- c('Albanian','Andorran','Austrian','Belarusian','Belgian','Bosnian','Bulgarian',
'Croatian','Cypriot','Czech','Danish','Estonian','Finnish','French','German', 'East German', 'Rhodesian', 'Greek','Hungarian','Icelandic','Irish','Italian','Kosovar','Latvian','Liechtensteiner','Lithuanian','Luxembourgish','Macedonian','Maltese','Moldovan','Monegasque','Montenegrin','Dutch','Norwegian','Polish','Portuguese','Romanian','Russian','Sammarinese','Serbian','Slovak','Slovenian','Spanish','Swedish','Swiss','Ukrainian','British','Vatican City State (Holy See)')

namerica <- c('American','Antiguan and Barbudan','Bahamian','Barbadian','Belizean','Canadian','Costa Rican','Cuban','Dominican (Dominican Republic)','Dominican (Dominica)','Salvadoran','Grenadian','Guatemalan','Haitian','Honduran','Jamaican','Kittitian and Nevisian','Lucian','Mexican','Nicaraguan','Panamanian','Saint Vincent and the Grenadines','Trinidadian and Tobagonian')

samerica <- c('Argentine','Bolivian','Brazilian','Chilean','Colombian','Ecuadorian','Guyanese','Paraguayan','Peruvian','Surinamese','Uruguayan','Venezuelan')

oceania <- c('Australian','Fijian','Kiribati','Marshallese','Micronesian','Nauruan','New Zealander','Palauan','Papua New Guinean','Samoan','Solomon Islander','Tongan','Tuvaluan','Vanuatuan')

drivers$continent <- ifelse(drivers$nationality %in% africa, 'African',
                       ifelse(drivers$nationality %in% asia, 'Asian',
                              ifelse(drivers$nationality %in% europe, 'European',
                                     ifelse(drivers$nationality %in% namerica, 'North American',
                                            ifelse(drivers$nationality %in% samerica, 'South American',
                                                   ifelse(drivers$nationality %in% oceania, 'Oceanian', NA))))))

rm(asia, africa, europe, namerica, samerica, oceania)


# Creating graphics df
## total drivers by nationality
by_cont <- drivers |> 
  group_by(continent) |> 
  summarise(drivers = n())
## Nest continent <- nationality <- wins
by_nat <- drivers |> 
  group_by(continent, nationality) |> 
  summarise(drivers = n()) |> 
  group_nest(continent) |> 
  mutate(
    id = continent,
    type= 'pie',
    data = purrr::map(data, mutate, name = nationality, y  = drivers, ),
    data = purrr::map(data, list_parse)
  )

Driver nationality

F1 drivers come from all over the world: all continents are or have been represented in the driver’s championship. However, not every one of them is equally represented. The interactive graph below displays the distribution of the drivers’ home countries.

Drilldown chart code
# Making basic pie chart
donut_chart <- by_cont |>
  hchart('pie',
         hcaes(x = continent, y = drivers, drilldown = continent),
         name = 'Drivers'
         ) |> 
  hc_plotOptions(pie = list(innerSize = '70%')) |> 
  hc_title(text = 'Number of F1 drivers by geographical ')

# Making drilldown active
drilldown_chart <- donut_chart |> 
  hc_drilldown(
    #map to data
    series = list_parse(by_nat),
    allowPointDrilldown = TRUE,
    #set stylings of data labels that offer drill down views
    activeDataLabelStyle = list(
      textDecoration="none",
      color="black"
    )
  )

custom_theme <- hc_theme(
  colors = met.brewer('Degas'),
  chart = list(
    backgroundColor = NULL
  ),
  title = list(
    style = list(
      color = "#333333",
      fontFamily = "Archivo",
      fontWeight="bold"
    )
  ),
  xAxis = list(
    labels=list(style = list(
      color = "#666666",
      fontFamily = "Archivo"
    ))
  ),
  yAxis = list(
    labels=list(style = list(
      color = "#666666",
      fontFamily = "Archivo"
    ))
  ),
  tooltip = list(
    style = list(
      fontFamily = "Archivo"
    )
  ),
  plotOptions = list(
    series = list(
      dataLabels = list(style=list(fontFamily = "Archivo")
      ))
  )
)
drilldown_chart |> 
  hc_add_theme(custom_theme)

Driver stats

Code prep
# Building df
wintable <-  drivers |>
  select(name, nationality, wins, podiums) |> 
  filter(!is.na(wins)) |>
  arrange(desc(wins)) |>
  slice(1:20)
podtable <-  drivers |>
  select(name, nationality, podiums, wins) |> 
  filter(!is.na(podiums)) |>
  arrange(desc(podiums)) |>
  slice(1:20)

wintable$nationality <- c('GB', 'DE', 'NL', 'DE', 'FR', 'BR', 'ES', 'GB', 'GB', 'AT', 'GB', 'AR', 'DE', 'BR', 'GB', 'FI', 'FI', 'GB', 'GB', 'BR')
podtable$nationality <- c('GB', 'DE', 'DE', 'NL', 'ES', 'FR', 'FI', 'BR', 'BR', 'FI', 'GB', 'BR', 'GB', 'DE', 'AT', 'FI', 'GB', 'AT', 'AR', 'GB')

tottable <- full_join(wintable, podtable)
# Country flag images
flags <- flags |> 
  select(URL, `Alpha-2 code`)
## merge
wintable <- left_join(wintable, flags, by = join_by('nationality' == `Alpha-2 code`)) |> 
  select(name, URL, wins) |> 
  rename(nationality = URL)
podtable <- left_join(podtable, flags, by = join_by('nationality' == `Alpha-2 code`)) |> 
  select(name, URL, podiums) |> 
  rename(Name = name,
         Nationality = URL,
         Podiums = podiums)
tottable <- left_join(tottable, flags, by = join_by('nationality' == `Alpha-2 code`)) |>
  select(name, URL, wins, podiums) |> 
  rename(Name = name,
         Nationality = URL,
         Podiums = podiums,
         Wins = wins)

dtable <- bind_cols(wintable, podtable)
rm(wintable, podtable)

The rankings of the most podiums and the most wins do not necessarily coincide. Below are two tables with these two rankings. Below is a web-friendly, responsive table with the corresponding data. In the source code, a print-ready static table is also available.

Table design
# # Static table
# dtable |> 
#   gt() |> 
#   tab_header(
#     title = md("## Top ten F1 drivers")
#   ) |> 
#   gt_img_rows(columns = Nationality, height = 15) |> 
#   gt_img_rows(columns = nationality, height = 15) |> 
#   cols_align(
#     align = "center",
#     columns = 2:3) |> 
#   cols_align(
#     align = 'center',
#     columns = 4:5
#   ) |> 
#   tab_spanner(label = 'Most wins',
#               columns = 2:3) |> 
#   tab_spanner(label = 'Most podiums',
#               columns = 5:6) |>
#   gt_theme_538()

# Responsive table
tottable |> 
  gt(rowname_col = 'Name') |> 
  tab_header(title = html('<h2>Top F1 drivers</h2>'),
             ) |> 
  fmt_image(Nationality, height = 15) |> 
  cols_align(align = 'center', columns = 2:4) |> 
  opt_interactive(use_pagination = T,
                  use_sorting = T) |> 
  tab_options(table.background.color = 'white',
              table.font.style = 'Roboto',
              table.border.top.color = 'white',
              heading.align = 'center',
              row.striping.include_table_body =FALSE,
              heading.border.bottom.color = "white",
              row_group.border.bottom.color = "white",
              row_group.border.top.color = "white"
              )

Top F1 drivers